export CUDA_VISIBLE_DEVICES="0,1,2,3"
simu_gpu=32
export HYDRA_FULL_ERROR=1
#dataname=ximalaya_redian_2T
dataname=chat_1500
#checkpoint=/home/work_nfs15/asr_data/ckpt/origin_chinese_hubert/chinese_hubert_large.pt
checkpoint=/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/hubert_feat/ximalaya_redian_2T_chat_1000/hubert_large_iter3_32gpu_1/checkpoint_last.pt
exp_dir=/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/hubert_feat/$dataname
data_dir=/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/$dataname/new2
exp_name=hubert_large_by_xlgeng
all_data=( "train" )
extract_layer=9            # which layer to extract hiddens features
feat=${exp_name}_extract_layer_${extract_layer}
feat_dir=${exp_dir}/feature/${feat}
nj=40
num_gpu=4
num_node=1
km_dir=${exp_dir}/k-means/${feat}
world_size=4
update_freq=4

label_name=["km"]
label_rate=50
model_size=large
conf_name=hubert_large_librivox.yaml
exp_name=hubert_${model_size}_iter3_${simu_gpu}gpu_1
output_dir=${exp_dir}/${exp_name}
conf_dir=config/pretrain/
mkdir -p $output_dir


label_name=["ltr"]
label_rate=50
model_size=large
conf_dir=config/finetune
conf_name=base_960.yaml
exp_name=hubert_${model_size}_iter3_${simu_gpu}gpu_chat1500_mine
finetune_dir=${exp_dir}/${exp_name}
label_dir=${data_dir}/txt
mkdir -p $finetune_dir
echo "stage 16: Finetune the third iteration started @ `date`"
# fairseq-hydra-train                                             \
configname=base_960.yaml
# rm -rf $save
# phonesMatches_reduced
save=$finetune_dir

python fairseq_cli/hydra_train.py \
    task.data=$data_dir \
    common.seed=1337 \
    common.log_file=$save/train.json \
    common.tensorboard_logdir=$save/tensorboard checkpoint.save_dir=$save \
    distributed_training.distributed_world_size=4 optimization.update_freq='[4]' \
    model.w2v_path=$checkpoint \
    --config-dir $conf_dir \
    --config-name $configname
echo "stage 15: Done @ `date`"

